{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Verify S3_BUCKET Bucket Creation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import boto3\n",
    "import sagemaker\n",
    "import time\n",
    "\n",
    "session = boto3.session.Session()\n",
    "region = session.region_name\n",
    "\n",
    "sess = sagemaker.Session()\n",
    "bucket = sess.default_bucket()\n",
    "role = sagemaker.get_execution_role()\n",
    "sm = boto3.Session().client(service_name=\"sagemaker\", region_name=region)\n",
    "\n",
    "account_id = boto3.client(\"sts\").get_caller_identity().get(\"Account\")\n",
    "\n",
    "s3 = boto3.Session().client(service_name=\"s3\", region_name=region)\n",
    "\n",
    "airflow_env_name = \"mwaa-\" + region + \"-\" + account_id\n",
    "airflow_vpc_name = \"mwaa-vpc\" + region + \"-\" + account_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "setup_s3_bucket_passed = False\n",
    "%store -r airflow_bucket_name\n",
    "%store airflow_env_name\n",
    "%store airflow_vpc_name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from botocore.client import ClientError\n",
    "\n",
    "response = None\n",
    "\n",
    "try:\n",
    "    response = s3.head_bucket(Bucket=airflow_bucket_name)\n",
    "    print(response)\n",
    "    setup_s3_bucket_passed = True\n",
    "except ClientError as e:\n",
    "    print(\"[ERROR] Cannot find bucket {} in {} due to {}.\".format(airflow_bucket_name, response, e))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%store setup_s3_bucket_passed"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Configure Airflow DAG Files before uploading to S3 Bucket"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"./dags/config.py\", \"r\") as f:\n",
    "    lines = f.readlines()\n",
    "\n",
    "with open(\"./dags/config.py\", \"w\") as f:\n",
    "    for line in lines:\n",
    "        line = line.replace(\"{0}\", region)\n",
    "        line = line.replace(\"{1}\", bucket)\n",
    "        f.write(line)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Copy Airflow DAG to S3 Bucket"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%store -r s3_mwaa_private_path\n",
    "%store -r s3_mwaa_dags_private_path\n",
    "%store -r s3_mwaa_pipeline_private_path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!aws s3 cp --recursive ./dags/pipeline $s3_mwaa_pipeline_private_path\n",
    "!aws s3 cp ./dags/config.py $s3_mwaa_dags_private_path/config.py\n",
    "!aws s3 cp ./dags/bert_reviews.py $s3_mwaa_dags_private_path/bert_reviews.py\n",
    "!aws s3 cp ./dags/requirements.txt $s3_mwaa_private_path/requirements.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!aws s3 ls $s3_mwaa_private_path\n",
    "!aws s3 ls $s3_mwaa_dags_private_path"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Configure Temporary IAM Role Policy for MWAA VPC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Check number of policies attached to TeamRole, we need to have nine\n",
    "iam = boto3.resource(\"iam\")\n",
    "iam_client = boto3.client(\"iam\")\n",
    "team_role_arn = iam.Role(\"TeamRole\").arn\n",
    "\n",
    "team_role = iam.Role(\"TeamRole\")\n",
    "\n",
    "aws_managed_policies = [p for p in team_role.attached_policies.all()]\n",
    "\n",
    "if len(aws_managed_policies) >= 10:\n",
    "    print(\n",
    "        \"You have: {} policies attached to TeamRole, you need downsize to 9 Policies so that we can add an MWAA VPC Creation Policy.\".format(\n",
    "            len(aws_managed_policies)\n",
    "        )\n",
    "    )\n",
    "    print(\"Please do NOT continue unless until you run this and get a Success message\")\n",
    "else:\n",
    "    print(\"Success! Please Continue...\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mwaa_vpc_policy_json = open(\"./src/mwaa_vpc_policy.json\", \"r\").read()\n",
    "mwaa_vpc_policy_json = mwaa_vpc_policy_json.replace(\"{0}\", region)\n",
    "mwaa_vpc_policy_json = mwaa_vpc_policy_json.replace(\"{1}\", account_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%store team_role_arn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "response = iam_client.create_policy(PolicyName=\"mwaa_vpc_policy\", PolicyDocument=mwaa_vpc_policy_json)\n",
    "\n",
    "mwaa_vpc_policy_arn = response[\"Policy\"][\"Arn\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create VPC MWAA Environment - Please be patient this can take around 10 Minutes."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cloudformation = boto3.resource(\"cloudformation\")\n",
    "\n",
    "mwaa_vpc_template_yaml = open(\"./cfn/mwaa_vpc_template.yaml\", \"r\").read()\n",
    "\n",
    "response = cloudformation.create_stack(\n",
    "    StackName=\"mwaa-vpc-stack\",\n",
    "    TemplateBody=mwaa_vpc_template_yaml,\n",
    "    Parameters=[\n",
    "        {\"ParameterKey\": \"EnvironmentName\", \"ParameterValue\": airflow_vpc_name},\n",
    "    ],\n",
    "    ResourceTypes=[\n",
    "        \"AWS::EC2::VPC\",\n",
    "    ],\n",
    "    OnFailure=\"ROLLBACK\",\n",
    "    EnableTerminationProtection=False,\n",
    ")\n",
    "\n",
    "stack_status = \"IN_PROGRESS\"\n",
    "\n",
    "print(\"Starting deployment of VPC {}. \\n\".format(airflow_vpc_name))\n",
    "\n",
    "while stack_status != \"CREATE_COMPLETE\":\n",
    "    stack_status = cloudformation.Stack(\"mwaa-vpc-stack\").stack_status\n",
    "    time.sleep(30)\n",
    "    print(\"Still waiting....\")\n",
    "\n",
    "print(\"\\n Sucess! VPC {} has been deployed sucessfully.\".format(airflow_vpc_name))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "vpc_outputs = cloudformation.Stack(\"mwaa-vpc-stack\").outputs\n",
    "\n",
    "airflow_sg_id = None\n",
    "for output in vpc_outputs:\n",
    "    if output[\"OutputKey\"] == \"IngressSecurityGroup\":\n",
    "        airflow_sg_id = output[\"OutputValue\"]\n",
    "        break\n",
    "\n",
    "subnet_index_list = [\"PrivateSubnet1\", \"PrivateSubnet2\"]\n",
    "airflow_subnet_ids = []\n",
    "\n",
    "for output in vpc_outputs:\n",
    "    for subnet_index in subnet_index_list:\n",
    "        if output[\"OutputKey\"] == subnet_index:\n",
    "            airflow_subnet_ids.append(output[\"OutputValue\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%store airflow_sg_id\n",
    "%store airflow_subnet_ids"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Configure IAM Role Policy for MWAA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Remove MWAA VPC Policy only needed for VPC Creation\n",
    "response = iam_client.detach_role_policy(RoleName=\"TeamRole\", PolicyArn=mwaa_vpc_policy_arn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Check number of policies attached to TeamRole, we need to have nine\n",
    "team_role = iam.Role(\"TeamRole\")\n",
    "\n",
    "aws_managed_policies = [p for p in team_role.attached_policies.all()]\n",
    "\n",
    "if len(aws_managed_policies) >= 10:\n",
    "    print(\n",
    "        \"You have: {0} policies attached to TeamRole, you need downsize to 9 Policies so that we can add an MWAA Policy.\".format(\n",
    "            len(aws_managed_policies)\n",
    "        )\n",
    "    )\n",
    "    print(\"Please do NOT continue unless until you run this and get a Success message\")\n",
    "else:\n",
    "    print(\"Success! Please Continue...\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mwaa_policy_json = open(\"./src/mwaa_policy.json\", \"r\").read()\n",
    "mwaa_policy_json = mwaa_policy_json.replace(\"{0}\", region)\n",
    "mwaa_policy_json = mwaa_policy_json.replace(\"{1}\", account_id)\n",
    "mwaa_policy_json = mwaa_policy_json.replace(\"{2}\", airflow_env_name)\n",
    "mwaa_policy_json = mwaa_policy_json.replace(\"{3}\", airflow_bucket_name)\n",
    "\n",
    "mwaa_assume_policy_json = open(\"./src/mwaa_assume_policy.json\", \"r\").read()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "response = iam_client.create_policy(PolicyName=\"mwaa_policy\", PolicyDocument=mwaa_policy_json)\n",
    "\n",
    "response = iam_client.attach_role_policy(RoleName=\"TeamRole\", PolicyArn=response[\"Policy\"][\"Arn\"])\n",
    "\n",
    "response = iam_client.update_assume_role_policy(RoleName=\"TeamRole\", PolicyDocument=mwaa_assume_policy_json)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Release Resources"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%html\n",
    "\n",
    "<p><b>Shutting down your kernel for this notebook to release resources.</b></p>\n",
    "<button class=\"sm-command-button\" data-commandlinker-command=\"kernelmenu:shutdown\" style=\"display:none;\">Shutdown Kernel</button>\n",
    "        \n",
    "<script>\n",
    "try {\n",
    "    els = document.getElementsByClassName(\"sm-command-button\");\n",
    "    els[0].click();\n",
    "}\n",
    "catch(err) {\n",
    "    // NoOp\n",
    "}    \n",
    "</script>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%javascript\n",
    "\n",
    "try {\n",
    "    Jupyter.notebook.save_checkpoint();\n",
    "    Jupyter.notebook.session.delete();\n",
    "}\n",
    "catch(err) {\n",
    "    // NoOp\n",
    "}"
   ]
  }
 ],
 "metadata": {
  "instance_type": "ml.t3.medium",
  "kernelspec": {
   "display_name": "Python 3 (Data Science)",
   "language": "python",
   "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
